clear all
set more off

*this flag should be zero to create main IV (if one, creates IV for married only)
local flag_married_only=0


*In this file, we group people into a_inclass (default - by age/education), and construct recreation price growth rates and IV.
local flag_figures=1 /*need to be =0 to get correct price IVs and consumption growth*/

/*if _educ=1 and _age=0 then only split by educ (fig 11). and vice versa. if both==1, then double split (fig 6)*/
local flag_figures_educ=1
local flag_figures_age=1

**************************************************************************
*********************PRELIMINARIES****************************************
**************************************************************************
	
use cex_processed, clear


	
*merge with recreation prices
merge m:1 year using "../US_prices_4_regions/prices_US_rec_detailed"
	
*3 periods: compute consumption shares using period 0 (for IV), growth rates from period 2 to period 1
if `flag_figures'==1{
	local start_year_price0=1989
	local finish_year_price0=1991
	local start_year_share0=1980
	local finish_year_share0=1988
	local start_year1=2010
	local finish_year1=2018
}
if `flag_figures'!=1{
	local start_year_price0=1989
	local finish_year_price0=1991
	local start_year_share0=1980
	local finish_year_share0=1988
	local start_year1=2014
	local finish_year1=2018
}
gen period=.
replace period=0 if (year>=`start_year_share0' & year<=`finish_year_share0')
replace period=1 if (year>=`start_year_price0' & year<=`finish_year_price0')
replace period=2 if (year>=`start_year1' & year<=`finish_year1')

*measure of hours
*gen hours=inc_hrs1*incweek1 /*hours of reference person*/
gen hours=inc_hrs1*incweek1+inc_hrs2*incweek2 
	
*measure of wage	
gen wage_bill=fsalaryx
gen wage_bill_r=wage_bill/price_CPI
	
*delete outliers observations with zero hours and nonzero wages (and vice versa)
drop if wage_bill==0 & hours>0
drop if hours==0 & wage_bill>0
	
	

*split by income
gen a_inclass_educ=.

replace a_inclass_educ=1 if educ_ref_1d=="2" | educ_ref_1d=="1" | educ_ref_1d=="7"	/*HS, no diploma*/
replace a_inclass_educ=2 if educ_ref_1d=="3"	/*HS grad*/
replace a_inclass_educ=3 if educ_ref_1d=="4"	/*some college*/
replace a_inclass_educ=4 if educ_ref_1d=="5"	/*college grad*/
replace a_inclass_educ=5 if educ_ref_1d=="6"	/*more than college*/

replace a_inclass_educ=1 if educ_ref_2d=="11" | educ_ref_2d=="10" | educ_ref_2d=="0"
replace a_inclass_educ=2 if educ_ref_2d=="12"
replace a_inclass_educ=3 if educ_ref_2d=="13"
replace a_inclass_educ=4 if educ_ref_2d=="14" | educ_ref_2d=="15"
replace a_inclass_educ=5 if educ_ref_2d=="16" | educ_ref_2d=="17"


gen a_inclass_age=.
replace a_inclass_age=1 if age>=25 & age<35
replace a_inclass_age=2 if age>=35 & age<50
replace a_inclass_age=3 if age>=50 & age<65

drop if a_inclass_age==.
drop if a_inclass_educ==.


if `flag_married_only'==1 {
	keep if marital1==1
}







egen long a_inclass=group(a_inclass_age a_inclass_educ) 


if `flag_figures'==1 {
	if `flag_figures_educ'==1 & `flag_figures_age'==0{
		local max_a_inclass=5
		drop a_inclass
		gen a_inclass=a_inclass_educ
	}
	if `flag_figures_age'==1 & `flag_figures_educ'==0{
		local max_a_inclass=3
		drop a_inclass
		gen a_inclass=a_inclass_age
	}
	if `flag_figures_age'==1 & `flag_figures_educ'==1{
		local max_a_inclass=15
	}

}






drop if a_inclass==.


*from now on, we do not delete observation. Renormalize weights so that they equal to 1 each year
sort year a_inclass
by year: egen tot_weight=total(nweight)
by year a_inclass: egen N_a_inclass=count(year)
replace nweight=nweight/tot_weight
drop tot_weight

save TEMP_prelim, replace





	
**************************************************************************
*****STEP 1: RECREATION PRICES, CONSUMPTION AND HOURS BY INCOME GROUPS****
**************************************************************************	
	
clear all
use TEMP_prelim, replace

*construct consumption price each year for each group
gen price_rec_r_a_inclass=.
replace price_rec_r_a_inclass=100 if year==1980
sort a_inclass year
	
*drop rec_sport rec_photo rec_oth_goods rec_oth_serv rec_audio_video rec_pets rec_reagion

local list sport photo oth_goods oth_serv audio_video pets reading overall
gen rec_overall=rec_sport+rec_photo+rec_oth_goods+rec_oth_serv+rec_audio_video+rec_pets+rec_reading
foreach var of local list{
	by a_inclass year: egen rec_`var'_a_inclass=total(rec_`var'*nweight)
}
by a_inclass year: egen totexp_a_inclass=total(totexp*nweight)


	
local list sport photo oth_goods oth_serv audio_video pets reading	
foreach var of local list{
	gen frac_rec_`var'_a_inclass=rec_`var'_a_inclass/rec_overall_a_inclass
}

by a_inclass year: egen hours_a_inclass=total(hours*nweight)
by a_inclass year: egen weight_a_inclass=total(nweight)
by a_inclass year: egen wage_bill_r_a_inclass=total(wage_bill_r*nweight)
gen wage_r_a_inclass=wage_bill_r_a_inclass/hours_a_inclass
replace hours_a_inclass=hours_a_inclass/weight_a_inclass /*per capita hours*/


sort a_inclass year
by a_inclass year: gen idd=_n
drop if idd>1
drop idd


*recreation price by group: growth contributions (start with 100 in 1980, and then weigh growth of groups by corresponding weight in recreation basket
sort a_inclass year
local list sport photo oth_goods oth_serv audio_video pets reading
gen aux=0
foreach var of local list{
	by a_inclass: gen contrib_`var'=frac_rec_`var'_a_inclass*(price_`var'_r/price_`var'_r[_n-1])
	replace aux=aux+contrib_`var'
}

by a_inclass: replace price_rec_r_a_inclass=price_rec_r_a_inclass[_n-1]*aux if price_rec_r_a_inclass==.
drop aux


gen cons_rec_r_a_inclass=rec_overall_a_inclass/weight_a_inclass/price_CPI/price_rec_r_a_inclass /*real per capita recreation consumption. Notice that we also divide by real group-specific price of recreaion*/
gen cons_nonrec_r_a_inclass=(totexp_a_inclass-rec_overall_a_inclass)/weight_a_inclass/price_CPI /*real per capita non-recreation consumption*/




keep period year a_inclass* price_rec_r_a_inclass hours_a_inclass cons_rec_r_a_inclass cons_nonrec_r_a_inclass frac_rec* rec* wage_r_a_inclass






*average across three periods (period 0 to compute pre-existing share, initial and final)
drop if period==.
drop year
sort a_inclass period
local list sport photo oth_goods oth_serv audio_video pets reading overall
foreach var of local list{
	by a_inclass period: egen rec_`var'_a_inclass_period=total(rec_`var'_a_inclass) 
}	
local list sport photo oth_goods oth_serv audio_video pets reading

*notice that we do not divide recreation consumption by price index, so that more recent observations mechanicially receive larger weights. This is fine since we want to construct IV, and that will make IV stronger 
foreach var of local list{
	gen share_`var'_a_inclass=rec_`var'_a_inclass_period/rec_overall_a_inclass_period

}	
by a_inclass period: egen price_rec_r_a_inclass_mean=mean(price_rec_r_a_inclass)
by a_inclass period: egen hours_a_inclass_mean=mean(hours_a_inclass)
by a_inclass period: egen cons_rec_r_a_inclass_mean=mean(cons_rec_r_a_inclass)
by a_inclass period: egen cons_nonrec_r_a_inclass_mean=mean(cons_nonrec_r_a_inclass)
by a_inclass period: gen id=_n
drop if id>1
by a_inclass: gen gr_price_rec_r_a_inclass=log(price_rec_r_a_inclass_mean/price_rec_r_a_inclass_mean[_n-1]) /*growth in %*/
by a_inclass: gen gr_hours_a_inclass=log(hours_a_inclass_mean/hours_a_inclass_mean[_n-1]) /*growth in %*/
by a_inclass: gen gr_cons_rec_r_a_inclass_mean=log(cons_rec_r_a_inclass_mean/cons_rec_r_a_inclass_mean[_n-1]) /*growth in %*/
by a_inclass: gen gr_cons_nonrec_r_a_inclass_mean=log(cons_nonrec_r_a_inclass_mean/cons_nonrec_r_a_inclass_mean[_n-1]) /*growth in %*/

keep gr* period a_inclass* share*


if `flag_figures'==1 {

	preserve

		foreach var of local list{
			replace share_`var'_a_inclass=share_`var'_a_inclass*100
		}	
	
		drop gr*
	
		reshape long share, i(a_inclass period) j(item) string
		
		sort period a_inclass item
		
	
		
		
		graph bar share if period==0 & a_inclass==1, /*
		*/ over(item, relabel (1 "Audio-video" 2 "Oth. goods" 3 "Oth. services" 4 "Pets" 5 "Photo" 6 "Reading" 7 "Sports") label(angle(45))) /*
		*/ bgcolor(white) graphregion(color(white)) plotregion(lcolor(black) lwidth(medthin)) ysize(4) ytitle("Fraction of total recreation consumption [%]") /*
		*/ ylabel(0[10]60, gmin gmax)
		if `flag_figures_educ'==1 & `flag_figures_age'==0 {
			graph export "../figures/rec_category_less_than_HS_early.png", as(png) replace
			graph export "../figures/rec_category_less_than_HS_early.eps", as(eps) replace
		}		
		if `flag_figures_age'==1 & `flag_figures_educ'==1 {
			graph export "../figures/rec_category_young_less_than_HS_early.png", as(png) replace
			graph export "../figures/rec_category_young_less_than_HS_early.eps", as(eps) replace
		}
		
		
		
		
		graph bar share if period==0 & a_inclass==`max_a_inclass', /*
		*/ over(item, relabel (1 "Audio-video" 2 "Oth. goods" 3 "Oth. services" 4 "Pets" 5 "Photo" 6 "Reading" 7 "Sports") label(angle(45))) /*
		*/ bgcolor(white) graphregion(color(white)) plotregion(lcolor(black) lwidth(medthin)) ysize(4) ytitle("Fraction of total recreation consumption [%]") /*
		*/ ylabel(0[10]60, gmin gmax)
		if `flag_figures_educ'==1 & `flag_figures_age'==0 {
			graph export "../figures/rec_category_more_than_college_early.png", as(png) replace
			graph export "../figures/rec_category_more_than_college_early.eps", as(eps) replace
		}	
		if `flag_figures_age'==1 & `flag_figures_educ'==1 {
			graph export "../figures/rec_category_old_more_than_college_early.png", as(png) replace
			graph export "../figures/rec_category_old_more_than_college_early.eps", as(eps) replace
		}		
		
		graph bar share if period==2 & a_inclass==1, /*
		*/ over(item, relabel (1 "Audio-video" 2 "Oth. goods" 3 "Oth. services" 4 "Pets" 5 "Photo" 6 "Reading" 7 "Sports") label(angle(45))) /*
		*/ bgcolor(white) graphregion(color(white)) plotregion(lcolor(black) lwidth(medthin)) ysize(4) ytitle("Fraction of total recreation consumption [%]") /*
		*/ ylabel(0[10]60, gmin gmax)
		if `flag_figures_educ'==1 & `flag_figures_age'==0 {
			graph export "../figures/rec_category_less_than_HS_late.png", as(png) replace
			graph export "../figures/rec_category_less_than_HS_late.eps", as(eps) replace
		}	
		if `flag_figures_age'==1 & `flag_figures_educ'==1 {
			graph export "../figures/rec_category_young_less_than_HS_late.png", as(png) replace
			graph export "../figures/rec_category_young_less_than_HS_late.eps", as(eps) replace
		}	
		
		graph bar share if period==2 & a_inclass==`max_a_inclass', /*
		*/ over(item, relabel (1 "Audio-video" 2 "Oth. goods" 3 "Oth. services" 4 "Pets" 5 "Photo" 6 "Reading" 7 "Sports") label(angle(45))) /*
		*/ bgcolor(white) graphregion(color(white)) plotregion(lcolor(black) lwidth(medthin)) ysize(4) ytitle("Fraction of total recreation consumption [%]") /*
		*/ ylabel(0[10]60, gmin gmax)
		if `flag_figures_educ'==1 & `flag_figures_age'==0 {
			graph export "../figures/rec_category_more_than_college_late.png", as(png) replace
			graph export "../figures/rec_category_more_than_college_late.eps", as(eps) replace
		}
		if `flag_figures_age'==1 & `flag_figures_educ'==1 {
			graph export "../figures/rec_category_old_more_than_college_late.png", as(png) replace
			graph export "../figures/rec_category_old_more_than_college_late.eps", as(eps) replace
		}		
		
		
		*black and white
		graph bar share if period==0 & a_inclass==1, /*
		*/ over(item, relabel (1 "Audio-video" 2 "Oth. goods" 3 "Oth. services" 4 "Pets" 5 "Photo" 6 "Reading" 7 "Sports") label(angle(45))) /*
		*/ bgcolor(white) graphregion(color(white)) plotregion(lcolor(black) lwidth(medthin)) ysize(4) ytitle("Fraction of total recreation consumption [%]") /*
		*/ ylabel(0[10]60, gmin gmax) scheme(s1mono)
		if `flag_figures_educ'==1 & `flag_figures_age'==0 {
			graph export "../figures/rec_category_less_than_HS_early_bw.png", as(png) replace
			graph export "../figures/rec_category_less_than_HS_early_bw.eps", as(eps) replace
		}		
		if `flag_figures_age'==1 & `flag_figures_educ'==1 {
			graph export "../figures/rec_category_young_less_than_HS_early_bw.png", as(png) replace
			graph export "../figures/rec_category_young_less_than_HS_early_bw.eps", as(eps) replace
		}
		
		
		graph bar share if period==0 & a_inclass==`max_a_inclass', /*
		*/ over(item, relabel (1 "Audio-video" 2 "Oth. goods" 3 "Oth. services" 4 "Pets" 5 "Photo" 6 "Reading" 7 "Sports") label(angle(45))) /*
		*/ bgcolor(white) graphregion(color(white)) plotregion(lcolor(black) lwidth(medthin)) ysize(4) ytitle("Fraction of total recreation consumption [%]") /*
		*/ ylabel(0[10]60, gmin gmax) scheme(s1mono)
		if `flag_figures_educ'==1 & `flag_figures_age'==0 {
			graph export "../figures/rec_category_more_than_college_early_bw.png", as(png) replace
			graph export "../figures/rec_category_more_than_college_early_bw.eps", as(eps) replace
		}	
		if `flag_figures_age'==1 & `flag_figures_educ'==1 {
			graph export "../figures/rec_category_old_more_than_college_early_bw.png", as(png) replace
			graph export "../figures/rec_category_old_more_than_college_early_bw.eps", as(eps) replace
		}		
		
		graph bar share if period==2 & a_inclass==1, /*
		*/ over(item, relabel (1 "Audio-video" 2 "Oth. goods" 3 "Oth. services" 4 "Pets" 5 "Photo" 6 "Reading" 7 "Sports") label(angle(45))) /*
		*/ bgcolor(white) graphregion(color(white)) plotregion(lcolor(black) lwidth(medthin)) ysize(4) ytitle("Fraction of total recreation consumption [%]") /*
		*/ ylabel(0[10]60, gmin gmax) scheme(s1mono)
		if `flag_figures_educ'==1 & `flag_figures_age'==0 {
			graph export "../figures/rec_category_less_than_HS_late_bw.png", as(png) replace
			graph export "../figures/rec_category_less_than_HS_late_bw.eps", as(eps) replace
		}	
		if `flag_figures_age'==1 & `flag_figures_educ'==1 {
			graph export "../figures/rec_category_young_less_than_HS_late_bw.png", as(png) replace
			graph export "../figures/rec_category_young_less_than_HS_late_bw.eps", as(eps) replace
		}	
		
		graph bar share if period==2 & a_inclass==`max_a_inclass', /*
		*/ over(item, relabel (1 "Audio-video" 2 "Oth. goods" 3 "Oth. services" 4 "Pets" 5 "Photo" 6 "Reading" 7 "Sports") label(angle(45))) /*
		*/ bgcolor(white) graphregion(color(white)) plotregion(lcolor(black) lwidth(medthin)) ysize(4) ytitle("Fraction of total recreation consumption [%]") /*
		*/ ylabel(0[10]60, gmin gmax) scheme(s1mono)
		if `flag_figures_educ'==1 & `flag_figures_age'==0 {
			graph export "../figures/rec_category_more_than_college_late_bw.png", as(png) replace
			graph export "../figures/rec_category_more_than_college_late_bw.eps", as(eps) replace
		}
		if `flag_figures_age'==1 & `flag_figures_educ'==1 {
			graph export "../figures/rec_category_old_more_than_college_late_bw.png", as(png) replace
			graph export "../figures/rec_category_old_more_than_college_late_bw.eps", as(eps) replace
		}		
	restore

}



*initial shares by a_inclass
preserve
	keep if period==0
	keep share* a_inclass*
	rename share* init_share*
	save TEMP_init_rec_shares, replace
restore

*growth in prices and hours by a_inclass
preserve
	keep if period==2
	keep gr* a_inclass*
	save TEMP_gr_price_hours, replace
restore



	
*****************************************************************************
**STEP 2: PRICE GROWTH FOR DIFFERENT RECREATION GOODS AT THE NATIONAL LEVEL**
*****************************************************************************
		
clear all
use TEMP_prelim, replace

sort period
drop if period==.
	
local list sport photo oth_goods oth_serv audio_video pets reading
foreach var of local list {
	by period: egen price_`var'_r_mean=mean(price_`var'_r)
	drop price_`var'_r
	rename price_`var'_r_mean price_`var'_r
}	
	
keep price* period
drop price_CPI
	
by period: gen id=_n
drop if id>1
drop id

sort period
	
foreach var of local list {
	gen gr_price_`var'_r_US=(price_`var'_r/price_`var'_r[_n-1]) /*notice no log here - will do log later (becaue price growht might be very large)*/
}

*keep only growth between periods 1 and 2
keep if period==2
	
keep gr_price*
	 
save TEMP_gr_price_USA, replace



******************************************************************************
*****************STEP 3: BARTIK IV FOR RECREATION PRICES**********************
******************************************************************************		
clear all

*merge three datasets (growth in prices and hours, initial recreation shares, and nation-wide price growths by category)
use TEMP_gr_price_hours, replace



merge 1:1 a_inclass using TEMP_init_rec_shares	
drop _merge

merge 1:1 _n using TEMP_gr_price_USA
drop _merge

erase TEMP_prelim.dta

*duplicate US-wide prices for all a_inclass (see data structure for details)
local list sport photo oth_goods oth_serv audio_video pets reading
foreach var of local list {
	replace gr_price_`var'_r_US=gr_price_`var'_r_US[_n-1] if gr_price_`var'_r_US==.
}

*compute IV
gen gr_price_rec_r_a_inclass_IV=0
foreach var of local list {
	replace gr_price_rec_r_a_inclass_IV=gr_price_rec_r_a_inclass_IV+gr_price_`var'_r_US*init_share_`var'_a_inclass
}
replace gr_price_rec_r_a_inclass_IV=log(gr_price_rec_r_a_inclass_IV)

keep gr_price_rec_r_a_inclass gr_price_rec_r_a_inclass_IV gr_cons* a_inclass* gr_hours

gen y_h=gr_hours_a_inclass
gen x_p=gr_price_rec_r_a_inclass
gen x_p_IV=gr_price_rec_r_a_inclass_IV
gen y_c=gr_cons_nonrec_r_a_inclass
gen y_d=gr_cons_rec_r_a_inclass



sleep 1000
cap erase TEMP_gr_price_USA.dta
cap erase TEMP_init_rec_shares.dta
cap erase TEMP_gr_price_hours.dta


rename a_inclass income_ptf
keep income_ptf x_p x_p_IV y_c y_d

if `flag_figures'==0 {
	if `flag_married_only'==0 {
		save rec_price_IV, replace	
	}

	if `flag_married_only'==1{
		save rec_price_IV_married_only, replace
	}
}


cap erase tempp.dta
